load data: 246 subjects, with 845 goals are included in the following analysis

goalRating_long_R <- read.csv(here("Baseline", "Inputs", "goalRating_long_R.csv"),stringsAsFactors = F)

wideDf <- read.csv(here("Baseline", "Inputs", "wideDf.csv"),stringsAsFactors = F)

indivDiffDf <- read.csv(here("Baseline", "Inputs", "indivDiffDf.csv"),stringsAsFactors = F)

goalListDf <- read.csv(here("Baseline", "Outputs", "listedGoals.csv"), stringsAsFactors = F)

goalDf_wide <- read.csv(here("Baseline", "Outputs", "goalDf_wide.csv"))

Data Screening for goal representation assessment

Missing data

Check the number of missing data per variable, and below is the top 5 variables. Missing data is rare for all variables except for external_importance. (5% of the goals had missing data) It was because there’s a glitch of the display logical for one of the goal blocks.

# check the number of missing data
totalGoal <- nrow(goalRating_long_R)/41

goalRating_long_R %>%
  filter(is.na(rating)) %>%
  tabyl(variable) %>%
  mutate(percent = n/totalGoal) %>%
  arrange(desc(percent)) %>%
  head(5)
##                    variable  n     percent
##         external_importance 42 0.049704142
##     end_state_specificity_R  6 0.007100592
##  attractiveness_achievement  3 0.003550296
##                  commitment  2 0.002366864
##                 commonality  2 0.002366864

The “I’m not sure” response

“construal_level”,“approach_avoidance” and “attainment_maintenance” question have an option for “I’m not sure” because they ask subjects to categorize their goals.

around 1% of the goals had “I’m not sure” as the response.

# check the number of "I'm not sure" responses per varialbe
goalRating_long_R %>%
  filter(rating == 99) %>%
  tabyl(variable) %>%
  mutate(percent = n/totalGoal) %>%
  arrange(desc(percent))
##                  variable  n    percent
##  attainment_maintenance_R 13 0.01538462
##      approach_avoidance_R 11 0.01301775
##           construal_level 11 0.01301775

The “not specified” response

temporal_duration, frequency and end_state_specificity question have an option for “not specified” because they ask about features that may not be applicable to all goals.

around 5% of the responses are “not specified”

# check the number of "not specified" responses per varialbe
goalRating_long_R %>%
  filter(rating == 999) %>%
  tabyl(variable) %>%
  mutate(percent = n/totalGoal) %>%
  arrange(desc(percent))
##                 variable  n    percent
##        temporal_duration 41 0.04852071
##  end_state_specificity_R 40 0.04733728
##              frequency_R 25 0.02958580

Transform all special cases to NAs

All “I’m not sure” and “not specified” responses will be treated as missing data.

# transform 99 & 999 to NAs
goalRating_long_R <- goalRating_long_R %>% 
  mutate(rating = replace(rating, rating == 99 | rating == 999, NA))

The number of claimed goals

Descriptive on the number of goals subject claimed to have prior to listing them (lower than the previous mTurk study: mean = 4.4)

describe(wideDf$total_goal)
##    vars   n mean  sd median trimmed  mad min max range skew kurtosis   se
## X1    1 246 3.68 2.9      3    3.31 1.48   1  40    39 8.47    98.79 0.18

Visualize the number of claimed goals per subject after excluding the extreme value (> 20) (1 claimed 40)

breaks = (1:20)
wideDf %>% 
  filter(total_goal < 20) %>%
  ggplot(aes(x = total_goal)) + 
  scale_x_continuous(labels=scales::comma(breaks, accuracy = 1), breaks=breaks) + 
  geom_histogram(fill = "orange", 
                 colour = "black",
                 binwidth = 1) + 
  labs(x="Number of claimed goals", y="# of participants") +
  theme_classic(base_size = 18) 

The percentage of subjects who claimed having more than 5 goals: 9.8%

# get the number of total subject
totalSub <- nrow(indivDiffDf)

length(wideDf$total_goal[wideDf$total_goal>5])/totalSub
## [1] 0.09756098

Descriptive on the number of goals participants actual listed (similar to all previous study), but the distribution was different (on previous mTurk data, 5-goal had the highest frequency).

describe(wideDf$listNum)
##    vars   n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 246 3.43 1.14      3    3.46 1.48   1   5     4 0.01    -0.92 0.07
breaks <- (1:5)
wideDf %>% 
  ggplot(aes(x = listNum)) + 
  scale_x_continuous(labels=scales::comma(breaks, accuracy = 1), breaks=seq(1, 5, by = 1)) + 
  geom_histogram(fill = "orange", 
                 colour = "black",
                 binwidth = 1) + 
  labs(x="Number of listed goals", y="# of participants") +
  theme_classic(base_size = 18) 

number of people who listed 1 goal: 9 (less than previous)

length(wideDf$listNum[wideDf$listNum == 1])
## [1] 9

descriptive on the differences between the number of claimed goals and listed goals (after exclude the extreme case)

wideDf <-wideDf %>%
  mutate(diffNum = total_goal - listNum)

goalDf_sum_wide_clean <- wideDf %>%filter(total_goal < 20)
  
describe(goalDf_sum_wide_clean$diffNum)
##    vars   n mean   sd median trimmed mad min max range skew kurtosis   se
## X1    1 245 0.11 1.24      0    0.01   0  -4  10    14 3.61    24.47 0.08
breaks <- (-4:15)
goalDf_sum_wide_clean %>% 
  ggplot(aes(x = diffNum)) + 
  scale_x_continuous(labels=scales::comma(breaks, accuracy = 1), breaks=breaks) + 
  geom_histogram(fill = "orange", 
                 colour = "black",
                 binwidth = 1) + 
  labs(x="Number of claimed goals - listed goals", y="# of participants") +
  theme_classic(base_size = 18) 

percentage of people who listed more goals than they claimed: 13% (less than previous mTurk)

length(wideDf$diffNum[wideDf$diffNum <0])/totalSub *100
## [1] 13.82114

percentage of people who listed less goals more than they claimed: 14%

length(wideDf$diffNum[wideDf$diffNum >0])/totalSub *100
## [1] 14.63415

Goal Representation Ratings

Descriptive stats

# descriptive stats for each variable 
mTurk_b_descriptive <- goalRating_long_R %>%
  dplyr::select(variable, rating) %>%
  group_by(variable) %>%
  summarize(mean = mean(rating, na.rm = TRUE),
            sd = sd(rating, na.rm = TRUE), 
            n = n(),
            min = min(rating, na.rm = TRUE),
            max = max(rating, na.rm = TRUE),
            skew = skew(rating, na.rm = T), 
            kurtosi = kurtosi(rating, na.rm = T)
            ) %>%
  arrange(skew)

#write.csv(goalRating_long_R, here("Baseline", "Outputs", "goalRating_long_R.csv"), row.names = F)

mTurk_b_descriptive %>%
  kable(format = "html", escape = F) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center")
variable mean sd n min max skew kurtosi
approach_avoidance_R 6.224221 1.5452637 845 1 7 -2.1727849 3.8432880
control 6.253855 1.1856600 845 1 7 -1.9338349 3.8044055
specificity 6.055687 1.3898389 845 1 7 -1.6669884 2.2108652
identified_motivation 6.215385 1.1115507 845 1 7 -1.5736792 2.4322229
measurability 6.098341 1.3091691 845 1 7 -1.4236411 1.3002904
clarity 6.082840 1.0798681 845 1 7 -1.2753662 1.5490704
attractiveness_achievement 6.042755 1.0231917 845 1 7 -1.2428597 2.0018518
self_resources 6.071174 1.0631651 845 1 7 -1.2018394 1.5139371
importance 6.127811 1.1623010 845 2 7 -1.1990699 0.5219177
commitment 6.111506 1.1354357 845 2 7 -1.1282786 0.3001185
ideal_motivation 5.571598 1.5971384 845 1 7 -1.1191065 0.5972925
social_desirability 5.865955 1.2755013 845 1 7 -1.0896227 0.8126483
initial_time_R 6.137278 1.7524782 845 1 8 -1.0527170 0.8437848
basic_needs 5.381065 1.8502264 845 1 7 -1.0309408 -0.0461374
attractiveness_progress 5.940828 1.0274903 845 1 7 -0.9945787 1.0384554
implementation_intention 5.621590 1.3803366 845 1 7 -0.9066333 0.2110371
regret 5.572275 1.4856335 845 1 7 -0.8200699 -0.1315040
temporal_duration 3.116915 0.6924337 845 1 4 -0.7205211 1.0806291
commonality 5.026097 1.8880251 845 1 7 -0.7175472 -0.5650281
end_state_specificity_R 2.315394 0.8514477 845 1 3 -0.6497471 -1.3114261
instrumentality 5.036730 1.8725277 845 1 7 -0.6476131 -0.6762803
attainability 8.223669 2.0712220 845 2 12 -0.5713721 -0.1369239
other_resources 4.795266 1.7671238 845 1 7 -0.5505078 -0.5271092
meaningfulness 4.940758 1.7551949 845 1 7 -0.4488154 -0.7473833
affordance 5.268639 1.4399591 845 1 7 -0.4303517 -0.6494085
urgency 4.931361 1.6145146 845 1 7 -0.4220002 -0.5668751
external_importance 4.570361 2.0791011 845 1 7 -0.3385082 -1.1988098
attainment_maintenance_R 4.484375 2.4479656 845 1 7 -0.3282102 -1.5504706
visibility 4.432977 2.0897224 845 1 7 -0.2851378 -1.1801299
difficulty 5.042604 1.4918683 845 1 7 -0.2737788 -0.6725020
effort 4.882701 1.6071413 845 1 7 -0.2612870 -0.8099205
failure 1.979882 0.9227624 845 1 3 0.0397350 -1.8258040
construal_level 3.990396 1.9999769 845 1 7 0.0555066 -1.1889644
connectedness 3.856465 1.9621212 845 1 7 0.0639689 -1.1150778
introjected_motivation 3.623669 2.1497070 845 1 7 0.1725515 -1.3866973
procrastination 3.571090 1.7645294 845 1 7 0.2363466 -0.8945785
intrinsic_motivation 3.511848 2.1328592 845 1 7 0.2576867 -1.3216728
external_motivation 3.305687 2.1323728 845 1 7 0.3857426 -1.2700443
frequency_R 1.364634 0.4816212 845 1 2 0.5614403 -1.6868374
ought_motivation 2.725444 1.9593931 845 1 7 0.8769575 -0.5325835
conflict 2.189798 1.4692543 845 1 7 1.2706138 0.9993013
# order based on their skewness 
#kable(varDf[order(varDf$skew),])

#mTurk_b_descriptive <- write.csv(mTurk_b_descriptive, here("Baseline", "Outputs", "mTurk_b_descriptive.csv"), row.names = F)

The trend looks more skewed than previous MTurk data

# histograms for each dimension
goalRating_long_R %>%
  ggplot(aes(x = rating)) +
    geom_histogram(fill   = "orange",
                   colour = "black",
                   alpha  = .6) +
    facet_wrap(~variable, nrow = 7)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

correlational matrix across all variables

“pairwise.complete.obs” is used for generating correlation matrix.The correlations make sense

# transform the long format to short format
goalDf_wide <- goalRating_long_R %>% spread (variable, rating)

# generate a correctional matrix
corrM_all <- goalDf_wide %>% 
  dplyr :: select(affordance:visibility) %>% 
  cor(use = "pairwise.complete.obs")

# visualization
corrplot(corrM_all, method = "circle",number.cex = .7, order = "AOE", addCoef.col = "black",type = "upper",col= colorRampPalette(c("midnightblue","white", "orange"))(200))

# write the wide format
write.csv(goalDf_wide, here("Baseline", "Outputs", "goalDf_wide.csv"), row.names = F)

left_join(goalListDf, goalDf_wide, by = c("MTurkCode","goal_order" = "goal")) %>%
  write.csv(., here("Baseline", "Outputs", "goalList_rating.csv"),row.names = F)

#write.csv(corrM_all, here("Baseline", "Outputs", "mTurk_b_corrM_all.csv"), row.names = F)

Variance Partition

Only the 31 variables for goal representation are included. Only around 6.7% of the variance is on the between subject level. (less than previous mTurk data)

# subset the long format dataset for only the 31 goal representation variable
goal_striving <- c("commitment", "urgency", "effort", "initial_time_R", "regret", "procrastination", "failure", "self_resources", "other_resources", "implementation_intention")
goalDf_R_long <- goalRating_long_R[!goalRating_long_R$variable %in% goal_striving,]

# generate a multilevel model with subject as the random intercept
mlm <-lmer(rating ~ variable + (1|MTurkCode), data = goalDf_R_long)

# calculate the variance partition coefficient and transform to ICC
VarCorr(mlm) %>%
  as_tibble() %>%
  mutate(icc=vcov/sum(vcov)) %>%
  dplyr :: select(grp, icc)
## # A tibble: 2 x 2
##   grp          icc
##   <chr>      <dbl>
## 1 MTurkCode 0.0671
## 2 Residual  0.933
Raw <- VarCorr(mlm) %>%
  as_tibble() %>%
  mutate(Raw=vcov/sum(vcov)) %>%
  dplyr :: select(Raw)

# just for paper output
mTurk_b_icc <- VarCorr(mlm) %>%
  as_tibble() %>%
  mutate(mTurk_b_icc=vcov/sum(vcov)) %>%
  dplyr :: select(mTurk_b_icc)

mTurk_b_icc$variation <- c("between-subject", "residual")

# write.csv(mTurk_b_icc, here("Baseline", "Outputs", "mTurk_b_icc.csv"), row.names = F)

item wise ICC

# create the function
varPartition <- function(var, groupVar, data){
  mlm <- lmer(var ~ 1 + (1|groupVar), data = data)
  
  outputDf <- VarCorr(mlm) %>%
    as_tibble() %>%
    mutate(outputDf=vcov/sum(vcov)) %>%
    dplyr :: select(outputDf)
}

# apply the function to each variable after excluding subjects with just 1 goal
df_cleaned <- goalDf_wide %>%
  filter(total_goal != 1) %>%
  select(-goal, -listNum, -total_goal, -goalType)


mTurk_b_item_ICC <- df_cleaned %>%
  select(-MTurkCode) %>%
  map(~varPartition(., df_cleaned$MTurkCode, df_cleaned)) %>%
  as_tibble() %>%
  t() %>%
  as.data.frame() %>%
  select(V1) %>%
  rename("mTurk_b_icc" = "V1") %>%
  rownames_to_column("Items")

#write.csv(mTurk_b_item_ICC, here("Baseline", "Outputs", "mTurk_b_item_ICC.csv"), row.names = F)

Exploritory Factor Analysis

Data transformation

26 variables are included. Ordinal variables are not included: “temporal_duration” & “end_state_specificity” and “frequency”; appoach_avoidance_R & attainment_maintainance_R are also dropped because these 2 variables are more relevant to the phrasing/content of a goal than the perception of a goal. This step is consistent with the previous studies

# Exclude the 8 variables related to goal striving progress
goalDf_R_wide <- goalDf_wide[,!names(goalDf_wide) %in% goal_striving]

# Exclude 5 goal representation variables and other columns with irrelevant data
goal_exclude <- c("temporal_duration", "end_state_specificity_R", "frequency_R", "attainment_maintenance_R", "approach_avoidance_R")
goalDf_EFA <- goalDf_R_wide[,!names(goalDf_R_wide) %in% goal_exclude]
goalDf_EFA <- subset(goalDf_EFA, select = affordance : visibility)

# Generate a correctional matrix 
corrM_raw <- cor(goalDf_EFA, use = "pairwise")

# export the correlation matrix
write.csv(goalDf_R_wide,here("Baseline", "Outputs", "baseline_goalRap.csv"), row.names = F)
write.csv(corrM_raw,here("Baseline", "Outputs", "baseline_corrRating_raw.csv"), , row.names = F)
write.csv(goalDf_EFA,here("Baseline", "Outputs", "baseline_EFA_raw.csv"), row.names = F)

evaluate the number of factors

# use Very Simple Structure criterion
res_vss <- psych :: nfactors(corrM_raw, n = 10, rotate = "promax", diagonal = FALSE, fm = "minres", 
n.obs=845,title="Very Simple Structure",use="pairwise",cor="cor")

# select useful parameters and organize them into a table
cbind(1:10, res_vss$map) %>%
  as_tibble() %>%
  rename(., factor = V1, map = V2) %>%
  cbind(., res_vss$vss.stats) %>%
  select(factor, map, fit, complex, eChisq, SRMR, eCRMS, eBIC, eRMS) %>%
  kable(format = "html", escape = F, caption = "VSS output -mTurk") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if `.name_repair` is omitted as of tibble 2.0.0.
## Using compatibility `.name_repair`.
VSS output -mTurk
factor map fit complex eChisq SRMR eCRMS eBIC eRMS
1 0.0167181 0.5377182 1.000000 5484.6510 0.0999285 0.1041827 3469.5893 0.0999285
2 0.0151357 0.6396585 1.271966 3045.2357 0.0744604 0.0810946 1198.6575 0.0744604
3 0.0144926 0.6217498 1.362641 2077.4962 0.0615014 0.0701224 392.6621 0.0615014
4 0.0154716 0.6290689 1.480419 1408.0995 0.0506328 0.0605843 -121.7299 0.0506328
5 0.0159802 0.6296766 1.701375 870.4132 0.0398087 0.0501236 -511.1508 0.0398087
6 0.0171543 0.6412920 1.741486 505.9865 0.0303518 0.0403383 -734.0514 0.0303518
7 0.0195055 0.6311499 1.916554 358.2234 0.0255383 0.0359511 -747.0278 0.0255383
8 0.0226734 0.6154710 1.932461 272.1391 0.0222592 0.0333248 -705.0647 0.0222592
9 0.0260845 0.6019241 1.935562 180.7963 0.0181430 0.0290235 -675.0994 0.0181430
10 0.0296189 0.5697054 1.840247 118.1485 0.0146666 0.0252101 -623.1785 0.0146666
# Use the Scree plot to identify the number of factors have Eigenvalues >1 and the output from the Parallel analysis

ev <- eigen(corrM_raw)
ap <- parallel(subject=nrow(goalDf_EFA),var=ncol(goalDf_EFA),
  rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS)

Extract factors

# extract 4 factors
fa_new_4 <-fa(r=corrM_raw, nfactors=4,n.obs = 845, rotate="promax", SMC=FALSE, fm="minres")

Factor loadings

fa.diagram(fa_new_4)

The factors remain the same but some variables have shifted, such as intrinsic motivation

# visualization
loadings <- fa.sort(fa_new_4)$loadings
loadings <- as.data.frame(unclass(loadings))
colnames(loadings) <- c("Value","Clarity", "External", "Consensus")
loadings$Items <- rownames(loadings)
loadings.m <- loadings %>% gather(-Items, key = "Factor", value = "Loading")
colOrder <- c("Value","Clarity", "External", "Consensus")
rowOrder <- rev(rownames(loadings))
loadings.m<- arrange(mutate(loadings.m,Items=factor(Items,leve=rowOrder)),Items)
loadings.m<- arrange(mutate(loadings.m,Factor=factor(Factor,leve=colOrder)),Factor)

ggplot(loadings.m, aes(Items, abs(Loading), fill=Loading)) + 
  facet_wrap(~ Factor, nrow=1) + #place the factors in separate facets
  geom_bar(stat="identity") + #make the bars
  coord_flip() + #flip the axes so the test names can be horizontal  
  #define the fill color gradient: blue=positive, red=negative
  scale_fill_gradient2(name = "Loading", 
                       high = "orange", mid = "white", low = "midnightblue", 
                       midpoint=0, guide="colourbar") +
  ylab("Loading Strength") + #improve y-axis label + 
  ggtitle("The Four Factor Solution from Study 5 Baseline") +
  geom_hline(yintercept = 0.3, color = "red", linetype="dotted") +
  theme_bw(base_size=10)

interfactor correlation:

Less inter-correlated than the previous dataset

fa_new_4$Phi %>% 
  as.tibble() %>% 
  dplyr::rename(Value = MR1, Clarity = MR2, External = MR3, Cansensus = MR4) %>%
  round(.,2) %>%
  remove_rownames() %>%
  mutate(factor = colnames(.)) %>%
  select(factor, everything()) %>%
  kable(format = "html", escape = F, caption = "<center>Factor Correlation of mTurk Longitudinal</center>") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
Factor Correlation of mTurk Longitudinal
factor Value Clarity External Cansensus
Value 1.00 0.37 0.33 0.40
Clarity 0.37 1.00 -0.07 0.18
External 0.33 -0.07 1.00 0.14
Cansensus 0.40 0.18 0.14 1.00

Model fit:

Less fit and has lower cummulated variance explained then the 4-factor model in mTurk 2

data.frame(sample = "mTurk_l", factors = 4, items = 26, observation = 845, chi = fa_new_4$chi, BIC = fa_new_4$BIC, fit = fa_new_4$fit, RMSEA = fa_new_4$RMSEA[1], cumVar = max(fa_new_4$Vaccounted[3,]), complexity = mean(fa_new_4$complexity)) %>%
  remove_rownames() %>%
  kable(format = "html", escape = F) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center")
sample factors items observation chi BIC fit RMSEA cumVar complexity
mTurk_l 4 26 845 1408.1 -219.5637 0.7446622 0.0751402 0.3382828 1.480419
sona_factorFit <- data.frame(sample = "mTurk", factors = 4, items = 26, observation = 845, chi = fa_new_4$chi, BIC = fa_new_4$BIC, fit = fa_new_4$fit, RMSEA = fa_new_4$RMSEA[1], cumVar = max(fa_new_4$Vaccounted[3,]), complexity = mean(fa_new_4$complexity))

#write.csv(sona_factorFit, "./outputs/mTurk2_factorFit.csv", row.names = F)

6 factor model

# extract 6 factors
fa_new_6 <-fa(r=corrM_raw, nfactors=6,n.obs = 845, rotate="promax", SMC=FALSE, fm="minres")

Factor loadings

fa.diagram(fa_new_6)

factor value, external, attainability, consensus and measurability are similar across datasets. The factor that slip from value factor is different across studies

# visualization
loadings <- fa.sort(fa_new_6)$loadings
loadings <- as.data.frame(unclass(loadings))
colnames(loadings) <- c("Value","External", "Attainability", "Instrumentality", "Consensus", "Measurability")
loadings$Items <- rownames(loadings)
loadings.m <- loadings %>% gather(-Items, key = "Factor", value = "Loading")
colOrder <- c("Value","External", "Attainability", "Instrumentality", "Consensus", "Measurability")
rowOrder <- rev(rownames(loadings))
loadings.m<- arrange(mutate(loadings.m,Items=factor(Items,leve=rowOrder)),Items)
loadings.m<- arrange(mutate(loadings.m,Factor=factor(Factor,leve=colOrder)),Factor)

ggplot(loadings.m, aes(Items, abs(Loading), fill=Loading)) + 
  facet_wrap(~ Factor, nrow=1) + #place the factors in separate facets
  geom_bar(stat="identity") + #make the bars
  coord_flip() + #flip the axes so the test names can be horizontal  
  #define the fill color gradient: blue=positive, red=negative
  scale_fill_gradient2(name = "Loading", 
                       high = "orange", mid = "white", low = "midnightblue", 
                       midpoint=0, guide="colourbar") +
  ylab("Loading Strength") + #improve y-axis label + 
  ggtitle("The Six Factor Solution from Study 5 Baseline") +
  geom_hline(yintercept = 0.3, color = "red", linetype="dotted") +
  theme_bw(base_size=10)

interfactor correlation:

factor value and instrumentality has the highest correlation

fa_new_6$Phi %>% 
  as.tibble() %>% 
  dplyr::rename(Value = MR1, External = MR2, Attainability = MR3, Instrumentality = MR4, Consensus = MR5,Measurability = MR6) %>%
  round(.,2) %>%
  remove_rownames() %>%
  mutate(factor = colnames(.)) %>%
  select(factor, everything()) %>%
  kable(format = "html", escape = F, caption = "<center>Factor Correlation of mTurk Longitudinal</center>") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
Factor Correlation of mTurk Longitudinal
factor Value External Attainability Measurability Instrumentality Consensus
Value 1.00 0.35 0.37 0.49 0.34 0.04
External 0.35 1.00 -0.05 0.12 0.19 -0.19
Attainability 0.37 -0.05 1.00 0.26 0.14 0.12
Measurability 0.49 0.12 0.26 1.00 0.24 0.05
Instrumentality 0.34 0.19 0.14 0.24 1.00 0.27
Consensus 0.04 -0.19 0.12 0.05 0.27 1.00

Model fit:

this 6-factor model fit a bit less than the SONA 2

data.frame(sample = "mTurk_l", factors = 6, items = 26, observation = 845, chi = fa_new_6$chi, BIC = fa_new_6$BIC, fit = fa_new_6$fit, RMSEA = fa_new_6$RMSEA[1], cumVar = max(fa_new_6$Vaccounted[3,]), complexity = mean(fa_new_6$complexity)) %>%
  remove_rownames() %>%
  kable(format = "html", escape = F) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center")
sample factors items observation chi BIC fit RMSEA cumVar complexity
mTurk_l 6 26 845 505.9865 -563.2142 0.8100089 0.0562876 0.4080015 1.741486
sona_factorFit <- data.frame(sample = "mTurk_l", factors = 6, items = 26, observation = 845, chi = fa_new_6$chi, BIC = fa_new_6$BIC, fit = fa_new_6$fit, RMSEA = fa_new_6$RMSEA[1], cumVar = max(fa_new_6$Vaccounted[3,]), complexity = mean(fa_new_6$complexity))

#write.csv(sona_factorFit, "./outputs/mTurk2_factorFit.csv", row.names = F)

correlation between factor scores across different levels

I double checked the labels but it’s still very weird

# create a hierarchical factor structure organization
ba <- bassAckward(corrM_raw, c(4,6), rotate = "promax", scores = "Bartlett", adjust = T, cut = 0.3, use = "pairwise", plot = F)

# re-label the factors with the factor names
ba$labels[[1]] <- c("Value", "Clarity", "External", "Consensus")
ba$labels[[2]] <- c("Value","External", "Attainability", "Instrumentality", "Consensus", "Measurability")

# create the figure
bassAckward.diagram(ba,digits=2,cut = .3,labels=NULL,marg=c(1.5,.5,1.0,.5),
main="BassAckward",items=TRUE,sort=TRUE,lr=F,curves=F,organize=T) 

EFA with polychoric correlation:

because the distribution of several variables were very skewed, it might be better to use polychoric correlation rather than Pearson’s correlation

Data transformation

Generate the correlation matrix with polychoric correlation

# Generate a correctional matrix with polychoric correaltion
corrM_pc <- mixedCor(goalDf_EFA)$rho

# export the correlation matrix
write.csv(corrM_pc,here("Baseline", "Outputs", "baseline_corrRating_pc.csv"), row.names = F)

evaluate the number of factors

# use Very Simple Structure criterion
res_vss <- psych :: nfactors(corrM_pc, n = 10, rotate = "promax", diagonal = FALSE, fm = "minres", 
n.obs=845,title="Very Simple Structure",use="pairwise",cor="cor")

# select useful parameters and organize them into a table
cbind(1:10, res_vss$map) %>%
  as_tibble() %>%
  rename(., factor = V1, map = V2) %>%
  cbind(., res_vss$vss.stats) %>%
  select(factor, map, fit, complex, eChisq, SRMR, eCRMS, eBIC, eRMS) %>%
  kable(format = "html", escape = F, caption = "VSS output -mTurk") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
VSS output -mTurk
factor map fit complex eChisq SRMR eCRMS eBIC eRMS
1 0.0239499 0.6161719 1.000000 7546.9805 0.1172200 0.1222103 5531.9189 0.1172200
2 0.0208194 0.7202552 1.281638 3834.3235 0.0835525 0.0909967 1987.7453 0.0835525
3 0.0191384 0.6835308 1.358198 2586.4517 0.0686226 0.0782418 901.6176 0.0686226
4 0.0197582 0.6974179 1.563847 1698.6159 0.0556112 0.0665413 168.7865 0.0556112
5 0.0196725 0.6773479 1.772831 1049.8206 0.0437192 0.0550475 -331.7434 0.0437192
6 0.0205316 0.6586235 1.788499 625.2586 0.0337400 0.0448412 -614.7794 0.0337400
7 0.0227019 0.6344446 1.948457 443.9486 0.0284303 0.0400222 -661.3027 0.0284303
8 0.0254301 0.6271577 1.938379 322.3286 0.0242250 0.0362679 -654.8752 0.0242250
9 0.0280223 0.6369919 1.950227 211.0743 0.0196035 0.0313597 -644.8215 0.0196035
10 0.0319250 0.5660937 1.947342 132.6798 0.0155424 0.0267155 -608.6472 0.0155424
# Use the Scree plot to identify the number of factors have Eigenvalues >1 and the output from the Parallel analysis

ev <- eigen(corrM_pc)
ap <- parallel(subject=nrow(goalDf_EFA),var=ncol(goalDf_EFA),
  rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS)

Extract factors

# extract 4 factors
fa_pc_4 <-fa(r=corrM_pc, nfactors=4,n.obs = 845, rotate="promax", SMC=FALSE, fm="minres")

Factor loadings

fa.diagram(fa_pc_4)

The factors remain the same but some variables have shifted, such as intrinsic motivation

# visualization
loadings <- fa.sort(fa_pc_4)$loadings
loadings <- as.data.frame(unclass(loadings))
colnames(loadings) <- c("Value","Clarity", "External", "Consensus")
loadings$Items <- rownames(loadings)
loadings.m <- loadings %>% gather(-Items, key = "Factor", value = "Loading")
colOrder <- c("Value","Clarity", "External", "Consensus")
rowOrder <- rev(rownames(loadings))
loadings.m<- arrange(mutate(loadings.m,Items=factor(Items,leve=rowOrder)),Items)
loadings.m<- arrange(mutate(loadings.m,Factor=factor(Factor,leve=colOrder)),Factor)

ggplot(loadings.m, aes(Items, abs(Loading), fill=Loading)) + 
  facet_wrap(~ Factor, nrow=1) + #place the factors in separate facets
  geom_bar(stat="identity") + #make the bars
  coord_flip() + #flip the axes so the test names can be horizontal  
  #define the fill color gradient: blue=positive, red=negative
  scale_fill_gradient2(name = "Loading", 
                       high = "orange", mid = "white", low = "midnightblue", 
                       midpoint=0, guide="colourbar") +
  ylab("Loading Strength") + #improve y-axis label + 
  ggtitle("Four Factor Solution from mTurk Longitudinal") +
  geom_hline(yintercept = 0.3, color = "red", linetype="dotted") +
  theme_bw(base_size=10)

interfactor correlation:

Less inter-correlated than the previous dataset

fa_pc_4$Phi %>% 
  as.tibble() %>% 
  dplyr::rename(Value = MR1, Clarity = MR2, External = MR3, Cansensus = MR4) %>%
  round(.,2) %>%
  remove_rownames() %>%
  mutate(factor = colnames(.)) %>%
  select(factor, everything()) %>%
  kable(format = "html", escape = F, caption = "<center>Factor Correlation of mTurk Longitudinal</center>") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
Factor Correlation of mTurk Longitudinal
factor Value External Clarity Cansensus
Value 1.00 0.40 0.32 0.36
External 0.40 1.00 -0.04 0.13
Clarity 0.32 -0.04 1.00 0.05
Cansensus 0.36 0.13 0.05 1.00

Model fit:

It fits better than the one with pearson

data.frame(sample = "mTurk_l", factors = 4, items = 26, observation = 845, chi = fa_pc_4$chi, BIC = fa_pc_4$BIC, fit = fa_pc_4$fit, RMSEA = fa_pc_4$RMSEA[1], cumVar = max(fa_pc_4$Vaccounted[3,]), complexity = mean(fa_pc_4$complexity)) %>%
  remove_rownames() %>%
  kable(format = "html", escape = F) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center")
sample factors items observation chi BIC fit RMSEA cumVar complexity
mTurk_l 4 26 845 1698.616 515.3963 0.8242109 0.0973533 0.4096362 1.563847
longitudinal_factorFit <- data.frame(sample = "mTurk", factors = 4, items = 26, observation = 845, chi = fa_pc_4$chi, BIC = fa_pc_4$BIC, fit = fa_pc_4$fit, RMSEA = fa_pc_4$RMSEA[1], cumVar = max(fa_pc_4$Vaccounted[3,]), complexity = mean(fa_pc_4$complexity))

#write.csv(sona_factorFit, "./outputs/mTurk2_factorFit.csv", row.names = F)

6 factor model

# extract 6 factors
fa_pc_6 <-fa(r=corrM_pc, nfactors=6,n.obs = 845, rotate="promax", SMC=FALSE, fm="minres")

Factor loadings

fa.diagram(fa_pc_6)

# visualization
loadings <- fa.sort(fa_pc_6)$loadings
loadings <- as.data.frame(unclass(loadings))
colnames(loadings) <- c("Value","External", "Attainability", "Consensus", "Measurability", "Instrumentality")
loadings$Items <- rownames(loadings)
loadings.m <- loadings %>% gather(-Items, key = "Factor", value = "Loading")
colOrder <- c("Value","External", "Attainability", "Consensus", "Measurability", "Instrumentality")
rowOrder <- rev(rownames(loadings))
loadings.m<- arrange(mutate(loadings.m,Items=factor(Items,leve=rowOrder)),Items)
loadings.m<- arrange(mutate(loadings.m,Factor=factor(Factor,leve=colOrder)),Factor)

ggplot(loadings.m, aes(Items, abs(Loading), fill=Loading)) + 
  facet_wrap(~ Factor, nrow=1) + #place the factors in separate facets
  geom_bar(stat="identity") + #make the bars
  coord_flip() + #flip the axes so the test names can be horizontal  
  #define the fill color gradient: blue=positive, red=negative
  scale_fill_gradient2(name = "Loading", 
                       high = "orange", mid = "white", low = "midnightblue", 
                       midpoint=0, guide="colourbar") +
  ylab("Loading Strength") + #improve y-axis label + 
  ggtitle("Six Factor Solution from mTurk Longitudinal") +
  geom_hline(yintercept = 0.3, color = "red", linetype="dotted") +
  theme_bw(base_size=10)

interfactor correlation:

factor value and instrumentality has the highest correlation

fa_pc_6$Phi %>% 
  as.tibble() %>% 
  dplyr::rename(Value = MR1, External = MR2, Attainability = MR3, Consensus = MR4, Measurability = MR5,Instrumentality = MR6) %>%
  round(.,2) %>%
  remove_rownames() %>%
  mutate(factor = colnames(.)) %>%
  select(factor, everything()) %>%
  kable(format = "html", escape = F, caption = "<center>Factor Correlation of mTurk Longitudinal</center>") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
Factor Correlation of mTurk Longitudinal
factor Value External Attainability Consensus Measurability Instrumentality
Value 1.00 0.27 0.39 0.43 0.16 0.51
External 0.27 1.00 -0.09 0.16 -0.18 0.04
Attainability 0.39 -0.09 1.00 0.22 0.17 0.26
Consensus 0.43 0.16 0.22 1.00 0.40 0.36
Measurability 0.16 -0.18 0.17 0.40 1.00 0.13
Instrumentality 0.51 0.04 0.26 0.36 0.13 1.00

Model fit:

it fits better than the one with Pearson

data.frame(sample = "mTurk_l", factors = 6, items = 26, observation = 845, chi = fa_pc_6$chi, BIC = fa_pc_6$BIC, fit = fa_pc_6$fit, RMSEA = fa_pc_6$RMSEA[1], cumVar = max(fa_pc_6$Vaccounted[3,]), complexity = mean(fa_pc_6$complexity)) %>%
  remove_rownames() %>%
  kable(format = "html", escape = F) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center")
sample factors items observation chi BIC fit RMSEA cumVar complexity
mTurk_l 6 26 845 625.2586 -124.0245 0.8781573 0.0774146 0.4862908 1.788499
longitudinal_factorFit <- data.frame(sample = "mTurk_l", factors = 6, items = 26, observation = 845, chi = fa_pc_6$chi, BIC = fa_pc_6$BIC, fit = fa_pc_6$fit, RMSEA = fa_pc_6$RMSEA[1], cumVar = max(fa_pc_6$Vaccounted[3,]), complexity = mean(fa_pc_6$complexity))

#write.csv(sona_factorFit, "./outputs/mTurk2_factorFit.csv", row.names = F)

correlation between factor scores across different levels

I double checked the labels but it’s still very weird

# create a hierarchical factor structure organization
ba <- bassAckward(corrM_pc, c(4,6), rotate = "promax", scores = "Bartlett", adjust = T, cut = 0.3, use = "pairwise", plot = F)

# re-label the factors with the factor names
ba$labels[[1]] <- c("Value", "Clarity", "External", "Consensus")
ba$labels[[2]] <- c("Value","External", "Attainability", "Consensus", "Measurability", "Instrumentality")

# create the figure
bassAckward.diagram(ba,digits=2,cut = .3,labels=NULL,marg=c(1,0.2,1.0,0.2),
main="BassAckward",items=TRUE,sort=TRUE,lr=F,curves=F,organize=T) 

Factor Scores

I will use the output from the polychoric correlation for the following analysis

4-factor:

Evaluation of Indeterminacy

Evaluate the indeterminacy of each factor based on the square of the multiple correlation between each factor and the original variable. (range from 0 to 1)

fa_pc_4$R2
## [1] 0.9124096 0.8373077 0.8067033 0.6875801

Evaluate the indeterminacy of each factor based on the minimum possible corre- lation between two sets of competing factor scores, 2p2 - 1 (range from -1 to 1)

2 * fa_pc_4$R2 -1
## [1] 0.8248192 0.6746154 0.6134067 0.3751601

Unweighted factor-based scores

# use the 4-factor model
factorScoreDf_4f <- goalDf_R_wide %>%
  rowwise() %>%
  mutate(Value = (attractiveness_achievement + attractiveness_progress + ideal_motivation + importance + construal_level + identified_motivation + meaningfulness + 
                  instrumentality + difficulty + connectedness) / 10,
         Clarity = (clarity + attainability + measurability + affordance + specificity - conflict + control) / 7,
         External = (ought_motivation + external_motivation + external_importance + introjected_motivation + visibility) / 5,
         Consensus = (-intrinsic_motivation + commonality + basic_needs + social_desirability) / 4) %>%
  select(MTurkCode, goal, goalType, Value, Clarity, External, Consensus)

# use the 6-factor model
factorScoreDf_6f <- goalDf_R_wide %>%
  rowwise() %>%
  mutate(Value = (attractiveness_achievement + attractiveness_progress  + importance + construal_level + identified_motivation + ideal_motivation + meaningfulness) / 7,
         External = (ought_motivation + external_motivation + introjected_motivation + external_importance + conflict + visibility) / 6,
         Attainability = (attainability - difficulty + clarity + affordance) /4,
         Consensus = (commonality + basic_needs + social_desirability - intrinsic_motivation) / 4,
         Measurability = (specificity + measurability) / 2,
         Instrumentality = (connectedness + instrumentality + control) / 3) %>%
  select(MTurkCode, goal, goalType, Value, External, Attainability, Consensus, Measurability, Instrumentality)

# write the csv factor score datasets
write.csv(factorScoreDf_4f, here("Baseline", "Outputs", "factorScoreDf_4f.csv"), row.names = F)
write.csv(factorScoreDf_6f, here("Baseline", "Outputs", "factorScoreDf_6f.csv"), row.names = F)

Unit Scores (after z-score)

# transform to z-score
goalDf_EFA_z <- goalDf_EFA %>%
  mutate(across(where(is.numeric), ~ scale(.x, center = TRUE, scale = TRUE)))